import numpy as np
import dask.dataframe as dd


taxes = dd.read_csv("FY2016-STC-Category-Table.csv", sep=";", blocksize=5)
taxes["Amount"] = taxes["Amount"].str.replace(",", "").replace("X", np.nan).astype(float)
taxes["Tax_Type"] = taxes["Tax_Type"].astype("category").cat.as_known()
pivot = taxes.pivot_table(index="Geo_Name", columns="Tax_Type", values="Amount")

has_property_info = pivot[~pivot["Property Taxes"].isna()].index
pivot_clean = pivot.loc[has_property_info.compute()]
frac_property = pivot_clean["Property Taxes"] / pivot_clean["Total Taxes"]
frac_property.visualize(filename="../10-property.svg", rankdir="LR") #sudo apt-get install graphviz；Dask的执行是延迟的：代码仅仅创建了任务图，供后面执行。


